"""
distinct算子：对RDD数据进行去重，返回新RDD
语法： rdd.distinct()
          无需参数
"""
from pyspark import SparkConf, SparkContext
import os
os.environ['PYSPARK_PYTHON'] = "C:/Users/Lenovo/AppData/Local/Programs/Python/Python310/python.exe"

conf = SparkConf().setMaster("local[*]").setAppName("test_spark")
sc = SparkContext(conf=conf)

rdd = sc.parallelize([1, 1, 2, 2, 3, 2, 4, 5, 6, 5, 9])
print(rdd.distinct().collect())